package com.ml.room.terrace.pipeline;

import cn.hutool.core.collection.CollectionUtil;
import com.alibaba.fastjson.JSON;
import com.ml.room.dao.IBuildingRecordDao;
import com.ml.room.dao.ISysCollarConfigDao;
import com.ml.room.repository.entity.BuildingRecord;
import com.ml.room.repository.entity.SysCollarConfig;
import com.ml.room.service.IBuildingRecordService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;

import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * @ClassName: BuildingRecordPagePipeline
 * @Decription: 【云房平台】 - 房屋备案 - 数据保存
 * @Author: IDai
 * @Date: 2021-08-26 11:22 星期四
 **/
@Component
public class BuildingRecordPagePipeline implements Pipeline {
    @Autowired
    private ISysCollarConfigDao iSysCollarConfigDao;
    @Autowired
    private IBuildingRecordService buildingRecordService;
    /** 日志打印工具 **/
    private static final Logger logger = LoggerFactory.getLogger(BuildingRecordPagePipeline.class);

    @Override
    public void process(ResultItems resultItems, Task task) {
        logger.info("【合肥新房】 - 房屋备案 - 待入库的数据是，resultItems：{}", JSON.toJSONString(resultItems));
        SysCollarConfig sysCollarConfig = iSysCollarConfigDao.selectById("1");
        //判断这页数据是否为重复数据
        boolean isRepeat = resultItems.get("isRepeat");
        if (!isRepeat){
            Set<BuildingRecord> buildingRecords = (Set<BuildingRecord>) resultItems.get("buildingRecords");
            if (CollectionUtil.isNotEmpty(buildingRecords)){
                //直接批量保存到数据库中
                buildingRecordService.saveBatch(buildingRecords);
                //爬虫任务抓取完成就更新系统配置的Page的页码，以便标记节点和下次爬取。
                sysCollarConfig.setCollarPage(sysCollarConfig.getCollarPage() + 1);
                iSysCollarConfigDao.updateById(sysCollarConfig);
            }else{
                //如果列表数据为空，则很可能是数据抓取完成了。
                logger.info("【合肥新房】 - 房屋备案 - 待入库的数据是，buildingRecords：{}", "数据全部抓取完成！");
            }
        }else {
            logger.info("【合肥新房】 - 房屋备案 - 待入库的数据是，buildingRecords：{}", "此列表数据重复！");
            //爬虫任务抓取完成就更新系统配置的Page的页码，以便标记节点和下次爬取。
            sysCollarConfig.setCollarPage(sysCollarConfig.getCollarPage() + 1);
            iSysCollarConfigDao.updateById(sysCollarConfig);
        }
    }
}
